home *** CD-ROM | disk | FTP | other *** search
- /*
- icombine: combine multiple ispell dictionary entries into a single
- entry with the options of all entries
-
- Author: Gary Puckering
- Cognos, Inc.
-
- Written: January 29, 1987
-
- Notes: Input lines consist of a word followed optionally by
- by one or more flags. e.g CREATE/V/N/S
-
- Flags on lines with identical root words are combined.
- No editing on flags is performed.
- Flags are forced to uppercase, but roots are left alone.
- Old-style flags, like /X/N will be output as /NX.
- Flags are output in alphabetical order.
- Non-letters appearing before the first "/" are retained,
- those after are dropped.
- Root words that differ only in capitalization are combined.
- */
-
- #include <stdio.h>
- #include <ctype.h>
- #include <string.h>
- #include "config.h"
- #include "ispell.h"
-
- void putword (void);
- void parse (char ln[], char wrd[], char flgs[]);
- void getflags (char *flgs);
- void putflags (void);
- int resolvecaps (char *word, char *ucword, char *lastword, char *uclastword);
- void uccopy (char *dest, char *src);
-
- #define MAXFLAGS 26 /* letters A-Z */
- #define MAXLINE 255 /* maximum line size */
-
- #define TRUE 1
- #define FALSE 0
- typedef int bool;
-
- bool flagtbl[MAXFLAGS]; /* array of flag options */
-
- char line[MAXLINE]; /* current line */
- char lastword[MAXLINE]; /* previous word */
- char uclastword[MAXLINE]; /* uppercase version of lastword */
- char word[MAXLINE]; /* current word */
- char ucword[MAXLINE]; /* uppercase version of current word */
- char flags[MAXLINE]; /* current flags */
- int expand = 0; /* if NZ, expand instead of combining */
-
- void main (int argc, char *argv[])
- {
-
- if (argc > 1 && strcmp (argv[1], "-e") == 0)
- expand = 1;
- if (gets (line))
- {
- parse (line, lastword, flags);
- uccopy (uclastword, lastword);
- getflags (flags);
- }
- else
- return;
-
- while (gets (line))
- {
- parse (line, word, flags);
- uccopy (ucword, word);
- if (strcmp (word, lastword) != 0) /* possibly different word */
- {
- if (strcmp (ucword, uclastword) != 0 /* truly different word */
- || resolvecaps (word, ucword, lastword, uclastword))
- { /* or caps differ */
- putword ();
- strcpy (lastword, word);
- strcpy (uclastword, ucword);
- }
- }
- getflags (flags);
- }
- putword ();
- return;
- }
-
- void putword (void)
- {
- printf ("%s", lastword);
- putflags ();
- }
-
- void parse (char ln[], char wrd[], char flgs[])
- {
- register char *p, *q;
-
- /* copy line up to first "/" or to end */
- for (p = ln, q = wrd; *p && *p != '/'; p++, q++)
- *q = *p;
- *q = NULL;
-
- strcpy (flgs, p); /* copy from "/" to end */
- }
-
- void getflags (char *flgs)
- {
- register char *p;
-
- for (p = flgs; *p; p++)
- if (*p != '/')
- {
- if (islower (*p))
- *p = toupper (*p);
- if (isupper (*p))
- flagtbl[(*p) - 'A'] = TRUE;
- }
- }
-
- void putflags (void)
- {
- register int i;
- int slashout = 0;
-
- if (expand)
- putchar ('\n');
-
- for (i = 0; i < MAXFLAGS; i++)
- if (flagtbl[i])
- {
- if (expand)
- printf ("%s/%c\n", lastword, i + 'A');
- else
- {
- if (!slashout)
- putchar ('/');
- slashout = 1;
- putchar (i + 'A');
- }
- flagtbl[i] = FALSE;
- }
- if (!expand)
- putchar ('\n');
- }
-
- /*
- * This routine resolves capitalization conflicts. The idea is to combine
- * only those cases that ispell can "uncombine".
- *
- * Entry: word and lastword differ, but only by case.
- *
- * Exit: Returns 1 if word and lastword both need to be in the dictionary,
- * 0 if they can be handled by a single entry. If the return is zero,
- * lastword may have been modified to reflect the union of the two
- * entries.
- *
- * Rules:
- *
- * (1) If either word is entirely in upper case, it "loses" to the other
- * word. The "winning" word is copied to lastword, and 0 is returned.
- * (2) If either word is "followcase" (defined as being mixed case with a
- * capital letter appearing after the first character), the two
- * variants are considered to differ, and 1 is returned. Furthermore,
- * a flag is set (by copying the word to "lastfollow") so that all
- * future variants fo the word will be considered to differ.
- * (3) If one word is capitalized and the other is all-lowercase, the
- * lowercase word "wins". It is copied to lastword, and 0 is returned.
- * HOWEVER, if a "followcase" variant of the word has been seen, this
- * rule does not apply, and rule (4) will cause the words to be
- * considered different.
- * (4) If a "followcase" variant of the word has been seen, the words are
- * always considered to differ. 1 is returned.
- *
- * Note that the input must be sorted with "sort -t/ +0f -1 +0 -1" for this
- * code to work.
- */
- int resolvecaps (char *word, char *ucword, char *lastword, char *uclastword)
- {
- register char *w;
- register char *lw;
- static char lastfollow[200] = "";
-
- /* Rule (1): Upper case loses */
- for (w = word; *w && !mylower (*w); w++)
- ;
- if (*w == '\0')
- return 0;
- for (lw = lastword; *lw && !mylower (*lw); lw++)
- ;
- if (*lw == '\0')
- {
- strcpy (lastword, word);
- strcpy (uclastword, ucword);
- return 0;
- }
- /* Rule (4): followcase forces all subsequent variants to be different. */
- if (strcmp (ucword, lastfollow) == 0)
- return 1;
- /* Rule (2): "followcase" is different. */
- for (w = word + 1, lw = lastword + 1;
- *w && !myupper (*w) && !myupper (*lw);
- w++, lw++)
- ;
- if (*w) /* We don't test *lw 'cause lengths are the same */
- {
- strcpy (lastfollow, ucword);
- return 1;
- }
- /* Rule (3): all-lowercase beats capitalized */
- if (myupper (lastword[0]))
- {
- strcpy (lastword, word);
- strcpy (uclastword, ucword);
- }
- return 0;
- }
-
- void uccopy (char *dest, char *src)
- {
- while (*src)
- {
- if (mylower (*src))
- *dest++ = toupper (*src);
- else
- *dest++ = *src;
- src++;
- }
- *dest = '\0';
- }
-